import tensorflow as tf\
, glob, os, numpy as np, PIL, matplotlib.pyplot as plt
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.zeros((1, 1)).gpu()
for i, c in enumerate(glob.glob('data/imagenette2-160/train/**')):
print(*['-']*20, f"\n-- Class : {os.path.basename(c)}")
_, axs = plt.subplots(1, 2)
ims = glob.glob(f'{c}/*.JPEG')
for ii in range(2):
im = PIL.Image.open(ims[ii])
axs[ii].imshow(im)
axs[ii].set_title(os.path.basename(c))
axs[ii].axis("off")
plt.show()
for i, c in enumerate(glob.glob('data/imagenette2-160/train/**')):
print(f"-- Class : {os.path.basename(c)}")
im_size = 3
cols = 4
_, axs = plt.subplots(1, cols, figsize=(im_size*cols,im_size))
im = glob.glob(f'{c}/*.JPEG')[0]
arr = np.array(PIL.Image.open(im), dtype='ubyte')
#
axs[0].imshow(arr)
axs[0].set_title("Original")
#
axs[1].imshow(tf.keras.preprocessing.image.random_shift(
arr, 0.5, 0.5, 0, 1, 2,
# 'reflect'
))
axs[1].set_title("Hieght and Width Shift")
#
axs[2].imshow(tf.keras.preprocessing.image.random_rotation(
arr, 360, 0, 1, 2,
# 'reflect'
))
axs[2].set_title("Rotation")
#
axs[3].imshow(tf.image.flip_left_right(arr).numpy())
axs[3].set_title("Horizontal Flip")
#
[ax.axis("off") for ax in axs]
plt.show()
def norm(arr):
"Convert and array with range 0,255 to -1,1"
return (arr - 127)/128
def denorm(arr):
"Convert and array with range -1,1 to 0,255"
return (arr*128)+127
train_dl = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=360,
width_shift_range=0.3,
height_shift_range=0.3,
horizontal_flip=True,
vertical_flip=True,
preprocessing_function=norm,
# rescale=1./255.
).flow_from_directory(
'data/imagenette2-160/train',
target_size=(160, 160)
)
valid_dl = tf.keras.preprocessing.image.ImageDataGenerator(
# rescale=1./255.,
preprocessing_function=norm,
).flow_from_directory(
'data/imagenette2-160/val',
target_size=(160, 160)
)
x_batch, y_batch = next(train_dl)
x_batch.shape
X batch shape explaination:
y_batch.shape
Y batch shape explaination:
i = 0
cols=4
reverse_class_maping = {j:i for i,j in train_dl.class_indices.items()}
while True:
if i>=x_batch.shape[0]:
break
_, axs = plt.subplots(1, cols, figsize=(3*cols, 3))
for ii in range(cols):
if i>=x_batch.shape[0]:
break
arr = denorm(x_batch[i]).astype('ubyte')
axs[ii].imshow(arr)
axs[ii].set_title(reverse_class_maping[y_batch[i].argmax()])
axs[ii].axis("off")
i+=1
plt.show()
Sequential Model layers- Use AT LEAST 3 hidden layers with appropriate input for each. Choose the best number for hidden units and give reasons.
We are counting one block of convolution, activation function and batch norm layer as a hidden layer. We would like to use a small model with 4 Hidden Layers. After the hidden layers we would like to flatten the output and apply a dense layer on averag pooled activation to reduce the dimensions of output to that of target predictions. We have not applied softmax activation to the top layer because we want to use logits directly in the loss function as they seem to be more stable numerically. Reference: a-guide-to-transfer-learning-with-keras-using-resnet50
Add L2 regularization to all the layers.
Added
Add one layer of dropout at the appropriate position and give reasons.
We will add the dropout layer before all dense layers as they are more prone to overfitting then the Convalution layers. No dropout layer has been applied after the final predictions.
Choose the appropriate activation function for all the layers.
We would be using Relu for all the hidden layers and finally a softmax layer at the end. The softmax is used at the end (Called top but) to scale the sum of confidene of all classes to 100% or 1.0 in floating point number.
from tensorflow.keras import layers
reg = tf.keras.regularizers.l2()
reg = None
model = tf.keras.Sequential([
layers.Conv2D(16, 3, 2, padding='same', input_shape=x_batch.shape[1:], data_format="channels_last", kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Conv2D(32, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Conv2D(64, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Conv2D(128, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Flatten(),
layers.Dropout(0.2),
layers.Dense(128),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Dropout(0.4),
layers.Dense(len(train_dl.class_indices), kernel_regularizer=reg)
])
model.summary()
model.compile(
optimizer=tf.keras.optimizers.Adam(0.003),
loss=tf.keras.losses.BinaryCrossentropy(True),
metrics=['accuracy']
)
import time
start = time.time()
history = model.fit_generator(
train_dl,
epochs=50,
validation_data=valid_dl
)
f"-- Total Time Taken to train model: {(time.time() - start)/60:0.3f} Minutes"
model.save('v1_50e')
h = f"""
-- Final Model Metrics:
> Training Loss: {history.history['loss'][-1]:0.3f} | Validation Loss: {history.history['val_loss'][-1]:0.3f}
> Training Accuracy: {history.history['accuracy'][-1]:0.3f} | Validation Accuracy: {history.history['val_accuracy'][-1]:0.3f}
"""
print(h)
#Confution Matrix and Classification Report
Y_pred = model.predict_generator(valid_dl)
y_pred = np.argmax(Y_pred, axis=1)
from sklearn.metrics import classification_report, confusion_matrix, plot_confusion_matrix
print('Confusion Matrix')
cm = confusion_matrix(valid_dl.classes, y_pred)
print(cm)
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
df_cm = pd.DataFrame(cm, valid_dl.class_indices.keys(), valid_dl.class_indices.keys())
plt.figure(figsize=(18,14))
sn.set(font_scale=1.4) # for label size
sn.heatmap(df_cm, cmap='viridis', annot=True, annot_kws={"size": 16}) # font size
plt.show()
rep = classification_report(valid_dl.classes, y_pred, None, valid_dl.class_indices.keys())
print(rep)
If we look at the above classification report:
conf_ground_truth = np.zeros((valid_dl.n, valid_dl.num_classes), 'float32')
conf_ground_truth[np.arange(valid_dl.n), valid_dl.classes] = 1.
conf_ground_truth
def softmax(x: np.ndarray) -> np.ndarray:
e_x = np.exp(x - np.max(x, axis=-1)[..., None])
e_y = e_x.sum(axis=-1)[..., None]
return e_x / e_y
bce = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
Y_pred_sft = softmax(Y_pred)
losses = bce(conf_ground_truth, Y_pred_sft).numpy()
argsrt_losses = losses.argsort()
for cn in valid_dl.class_indices:
print(f"\n\n== Class : {cn}")
cv = valid_dl.class_indices[cn]
mask = valid_dl.classes[argsrt_losses] == cv
for i, ii in enumerate(argsrt_losses[mask][:-3:-1]):
print(f"-- Image: {valid_dl.filepaths[ii]}")
_, axs = plt.subplots(1, 2, figsize=(12, 3))
axs[0].imshow(PIL.Image.open(valid_dl.filepaths[ii]))
axs[0].axis("off")
# axs[0].set_title(valid_dl.filepaths[ii])
#
# axs[1].axis("off")
c = ['b']*valid_dl.num_classes
c[valid_dl.classes[ii]] = 'r'
axs[1].bar(list(valid_dl.class_indices.keys()), Y_pred_sft[ii], 1, color=c)
plt.xticks(rotation=90)
plt.show()
print("".join(["="]*25))
#
Here we are doubling the batch size from the default size of 32 to 64. This would result in halving the steps per epoch as compared to batch size of 32.
train_dl.batch_size = 64
reg = tf.keras.regularizers.l2()
reg = None
model = tf.keras.Sequential([
layers.Conv2D(16, 3, 2, padding='same', input_shape=x_batch.shape[1:], data_format="channels_last", kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Conv2D(32, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Conv2D(64, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Conv2D(128, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Flatten(),
layers.Dropout(0.2),
layers.Dense(128),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Dropout(0.4),
layers.Dense(len(train_dl.class_indices), kernel_regularizer=reg)
])
#
model.compile(
optimizer=tf.keras.optimizers.Adam(0.003),
loss=tf.keras.losses.BinaryCrossentropy(True),
metrics=['accuracy']
)
import time
start = time.time()
history = model.fit_generator(
train_dl,
epochs=50,
validation_data=valid_dl
)
f"-- Total Time Taken to train model: {(time.time() - start)/60:0.3f} Minutes"
h = f"""
-- Final Model Metrics:
> Training Loss: {history.history['loss'][-1]:0.3f} | Validation Loss: {history.history['val_loss'][-1]:0.3f}
> Training Accuracy: {history.history['accuracy'][-1]:0.3f} | Validation Accuracy: {history.history['val_accuracy'][-1]:0.3f}
"""
print(h)
We have changed the batch size to double which will initially halve the number of steps and double the loss magnitude. The loss magnitude has doubles because of sum reduction of loss calculated over each image in the batch.
## Reset Batch size
train_dl.batch_size = 32
reg = tf.keras.regularizers.l2()
reg = None
model = tf.keras.Sequential([
layers.Conv2D(16, 3, 2, padding='same', input_shape=x_batch.shape[1:], data_format="channels_last", kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Conv2D(32, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Conv2D(64, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Dropout(0.4),
layers.Conv2D(128, 3, 2, padding='same', kernel_regularizer=reg),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Flatten(),
layers.Dense(128),
layers.BatchNormalization(),
layers.Activation("relu"),
#
layers.Dropout(0.5),
layers.Dense(len(train_dl.class_indices), kernel_regularizer=reg)
])
#
model.compile(
optimizer=tf.keras.optimizers.Adam(0.003),
loss=tf.keras.losses.BinaryCrossentropy(True),
metrics=['accuracy']
)
import time
start = time.time()
history = model.fit_generator(
train_dl,
epochs=50,
validation_data=valid_dl
)
f"-- Total Time Taken to train model: {(time.time() - start)/60:0.3f} Minutes"
h = f"""
-- Final Model Metrics:
> Training Loss: {history.history['loss'][-1]:0.3f} | Validation Loss: {history.history['val_loss'][-1]:0.3f}
> Training Accuracy: {history.history['accuracy'][-1]:0.3f} | Validation Accuracy: {history.history['val_accuracy'][-1]:0.3f}
"""
print(h)
Here we increased the dropout a little bit before top layer and moved a dropout layer from between two dense layers to two convolution layers.
- Because of the increasing the dropout before the top layer we see better validation loss and accuracy as compared to the previous model.
- Because of moving a dropout layer from between two dense layers to between two convolutions layers we see a slight dip in the training accuracy.